#First pass at EITC Validation graphs
library(readr)
library(haven)
library(tidyverse)
library(magrittr)

#Define functions for later use
add_narm <- function(a,b,c,d,e,f){
  a = ifelse(is.na(a)==TRUE,0,a)
  b = ifelse(is.na(b)==TRUE,0,b)
  c = ifelse(is.na(c)==TRUE,0,c)
  d = ifelse(is.na(d)==TRUE,0,d)
  e = ifelse(is.na(e)==TRUE,0,e)
  f = ifelse(is.na(f)==TRUE,0,f)
  a+b+c+d+e+f
}

sum_narm <- function(x){
  sum(x,na.rm = TRUE)
}

mean_narm <- function(x){
  mean(x,na.rm = TRUE)
}
#Load the data
reg_data <- read.csv("Z:/Race_Prediction/Data/NTJ Short Paper/reg_data.csv")
load("X:/public/demographic_tax_data/LIHTC/Data/CI_boot_samples/sp_summaries/all.RData")
CI_data <- as.data.frame(diffs.eitc_unc.incbin.clust)
CI_data$age_bin <- as.integer(CI_data$agi.bin.filers)

#+++++++++++++++++++++++++++++++++++++++++++++++++++++
# Second stage EITC takeup validation graphs
#Test EITC
eitc <- as.data.frame(subset(reg_data,reg_data$filer==1))
eitc$eic_ind <- ifelse(eitc$eic>0 & is.na(eitc$eic)==FALSE,1,0)

#Define graph data
bisg_eitc_ind <- c(mean(subset(eitc$eic_ind,eitc$demographic==1)),
                   sum_narm(eitc$eic_ind*eitc$prob_b1)/sum_narm(eitc$prob_b1),
                   mean(subset(eitc$eic_ind,eitc$demographic==2)),
                   sum_narm(eitc$eic_ind*eitc$prob_b2)/sum_narm(eitc$prob_b2),
                   mean(subset(eitc$eic_ind,eitc$demographic==3)),
                   sum_narm(eitc$eic_ind*eitc$prob_b3)/sum_narm(eitc$prob_b3),
                   mean(subset(eitc$eic_ind,eitc$demographic==4)),
                   sum_narm(eitc$eic_ind*eitc$prob_b4)/sum_narm(eitc$prob_b4),
                   mean(subset(eitc$eic_ind,eitc$demographic==6)),
                   sum_narm(eitc$eic_ind*eitc$prob_b6)/sum_narm(eitc$prob_b6))

bisg_eitc_ind <- bisg_eitc_ind*100


label1 <- c("White","Black","ANHPI","Native","Hispanic")
label2 <- c("","","","","") 

#Graph EITC Take-up truth versus predicted
par(mfrow=c(1,2))

#Plots with error bars
library(gplots)

bisg_m <- matrix(data = bisg_eitc_ind, nrow=2, ncol=5)

CI_main <- as.data.frame(sum.eitc_takeup.clust)
CI_main <- subset(CI_main,CI_main$boot_draw_i!=5)
upper <- matrix(data = c(NA,CI_main$CI_high[1],NA,CI_main$CI_high[2],NA,CI_main$CI_high[3],NA,CI_main$CI_high[4],NA,CI_main$CI_high[5])*100, nrow=2, ncol=5)
lower <- matrix(data = c(NA,CI_main$CI_low[1],NA,CI_main$CI_low[2],NA,CI_main$CI_low[3],NA,CI_main$CI_low[4],NA,CI_main$CI_low[5])*100, nrow=2, ncol=5)

names <- c("White","Black","ANHPI","Native","Hispanic")

par(bg=NA)
bp <- barplot2(bisg_m, beside = TRUE, horiz = FALSE, 
               plot.ci = TRUE, ci.u = upper, ci.l = lower, ci.lwd=4, border="white",ci.color = "orange",
               names.arg = names, col = c("lightskyblue", "firebrick"), xlim = c(1, 15),
               ylim=c(0,85),main = "EITC Claiming", ylab="Percent", cex.names = 1.1, cex.main=1.2,
               yaxt="n")
abline(h=0,col="black")
axis(side=2, at=c(0,20,40,60,80),
     labels=c("0%","20%","40%","60%","80%"))
legend("topleft", c("Observed","BIFSG"), pch=15, 
       col=c("lightskyblue","firebrick"),
       bty="n",cex=1)


#+++++++++++++++++++++++++++++++++++++++++++++++++++++
# Second stage EITC dollar amount (conditional average) validation graphs

#Define graph data
bisg_eitc_usd <- c(mean_narm(subset(eitc$eic,eitc$demographic==1 & eitc$eic_ind==1)),
                   sum_narm(eitc$eic*eitc$prob_b1*eitc$eic_ind)/
                     sum_narm(eitc$prob_b1*eitc$eic_ind),
                   
                   mean_narm(subset(eitc$eic,eitc$demographic==2 & eitc$eic_ind==1)),
                   sum_narm(eitc$eic*eitc$prob_b2*eitc$eic_ind)/
                     sum_narm(eitc$prob_b2*eitc$eic_ind),
                   
                   mean_narm(subset(eitc$eic,eitc$demographic==3 & eitc$eic_ind==1)),
                   sum_narm(eitc$eic*eitc$prob_b3*eitc$eic_ind)/
                     sum_narm(eitc$prob_b3*eitc$eic_ind),
                   
                   mean_narm(subset(eitc$eic,eitc$demographic==4 & eitc$eic_ind==1)),
                   sum_narm(eitc$eic*eitc$prob_b4*eitc$eic_ind)/
                     sum_narm(eitc$prob_b4*eitc$eic_ind),

                   mean_narm(subset(eitc$eic,eitc$demographic==6 & eitc$eic_ind==1)),
                   sum_narm(eitc$eic*eitc$prob_b6*eitc$eic_ind)/
                     sum_narm(eitc$prob_b6*eitc$eic_ind))


label1 <- c("White","Black","ANHPI","Native","Hispanic")
label2 <- c("","","","","") 


#Plots with error bars
library(gplots)

bisg_m <- matrix(data = bisg_eitc_usd, nrow=2, ncol=5)

CI_main <- as.data.frame(sum.eitc_cond.clust)
CI_main <- subset(CI_main,CI_main$boot_draw_i!=5)
upper <- matrix(data = c(NA,CI_main$CI_high[1],NA,CI_main$CI_high[2],NA,CI_main$CI_high[3],NA,CI_main$CI_high[4],NA,CI_main$CI_high[5]), nrow=2, ncol=5)
lower <- matrix(data = c(NA,CI_main$CI_low[1],NA,CI_main$CI_low[2],NA,CI_main$CI_low[3],NA,CI_main$CI_low[4],NA,CI_main$CI_low[5]), nrow=2, ncol=5)

names <- c("White","Black","ANHPI","Native","Hispanic")

par(bg=NA)
bp <- barplot2(bisg_m, beside = TRUE, horiz = FALSE, 
               plot.ci = TRUE, ci.u = upper, ci.l = lower, ci.lwd=4, border="white",ci.color = "orange",
               names.arg = names, col = c("lightskyblue", "firebrick"), xlim = c(1, 15),
               ylim=c(0,5000),main = "EITC Conditional Average", ylab="US Dollars",cex.names = 1.1, cex.main=1.2,
               yaxt="n")
abline(h=0,col="black")
axis(side=2, at=c(0,1000,2000,3000,4000,5000),
     labels=c("$0","$1,000","$2,000","$3,000","$4,000","$5,000"))
legend("topleft", c("Observed","BIFSG"), pch=15, 
       col=c("lightskyblue","firebrick"),
       bty="n",cex=1)




#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#+#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Differences by AGI category: Non-White - White
#Test EITC
eitc <- as.data.frame(subset(reg_data,reg_data$filer==1))
eitc$eic_ind <- ifelse(eitc$eic>0 & is.na(eitc$eic)==FALSE,1,0)

#Define AGI categories

cutoffs <- rep(0,10)
for(i in 1:10){
  cutoffs[i] <- quantile(eitc$agi,(0.1*i))
}

eitc %<>%
  mutate(agi_cat = ifelse(agi<cutoffs[1],1,NA),
         agi_cat = ifelse(agi>=cutoffs[1] & agi<cutoffs[2],2,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[2] & agi<cutoffs[3],3,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[3] & agi<cutoffs[4],4,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[4] & agi<cutoffs[5],5,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[5] & agi<cutoffs[6],6,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[6] & agi<cutoffs[7],7,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[7] & agi<cutoffs[8],8,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[8] & agi<cutoffs[9],9,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[9],10,agi_cat))

#Difference in conditional means between black and white
true_val <- rep(0,10)
for(i in 1:10){
  true_val[i] <- mean(subset(eitc$eic,eitc$demographic==2 & eitc$agi_cat==i))-
                        mean(subset(eitc$eic,eitc$demographic==1 & eitc$agi_cat==i))
}

predicted <- rep(0,10)
for(i in 1:10){
  predicted[i] <- sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b2,eitc$agi_cat==i)/
                                 sum_narm(subset(eitc$prob_b2,eitc$agi_cat==i))) -
  sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b1,eitc$agi_cat==i)/
                     sum_narm(subset(eitc$prob_b1,eitc$agi_cat==i)))
}

CI_low <- subset(CI_data$CI_lower,CI_data$diff=="2_1")
CI_high <- subset(CI_data$CI_upper,CI_data$diff=="2_1")

xvals <- seq(0.1,1,0.1)

#Graphing colors
library(RColorBrewer)
clr <- brewer.pal(8,"Set1")

#Graph EITC Take-up truth versus predicted
par(mfrow=c(2,2))
par(bg=NA)

# Draw the two bar chart using above datasets
plot(xvals,true_val,ylim=c(0,2500),col="white",
     main="Black - White",
     xlab = "Sample Income Decile",
     ylab="U.S. Dollars",
     yaxt="n",xaxt="n",
     cex.lab=1.1, cex.main=1.3)
polygon(c(xvals,rev(xvals)),c(CI_low,rev(CI_high)),col="mistyrose",border=NA)
abline(h=0,col="gray",lty=2)
lines(xvals,true_val,col=clr[2],lwd=4)
lines(xvals,predicted,col= "firebrick",lwd=4)
axis(side=2, at=c(0,1000,2000),
     labels=c("$0","$1,000","$2,000"))
axis(side=1, at=c(0.2,0.4,0.6,0.8,1),
     labels=c("20%","40%","60%","80%","100%"))
legend("topleft", c("Observed","BIFSG","Bootstrapped CIs"), lwd=c(4,4,10), 
       col=c(clr[2],"firebrick","mistyrose"), 
       bty="n")




#Difference in conditional means between hispanic and white
true_val <- rep(0,10)
for(i in 1:10){
  true_val[i] <- mean(subset(eitc$eic,eitc$demographic==6 & eitc$agi_cat==i)) -
                        mean(subset(eitc$eic,eitc$demographic==1 & eitc$agi_cat==i))
}

predicted <- rep(0,10)
for(i in 1:10){
  predicted[i] <- sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b6,eitc$agi_cat==i)/
                             sum_narm(subset(eitc$prob_b6,eitc$agi_cat==i))) -
    sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b1,eitc$agi_cat==i)/
               sum_narm(subset(eitc$prob_b1,eitc$agi_cat==i)))
}

CI_low <- subset(CI_data$CI_lower,CI_data$diff=="6_1")
CI_high <- subset(CI_data$CI_upper,CI_data$diff=="6_1")

xvals <- seq(0.1,1,0.1)


# Draw the two bar chart using above datasets
plot(xvals,true_val,ylim=c(0,2500),col="white",
     main="Hispanic - White",
     xlab = "Sample Income Decile",
     ylab="U.S. Dollars",
     yaxt="n",xaxt="n",
     cex.lab=1.1, cex.main=1.3)
polygon(c(xvals,rev(xvals)),c(CI_low,rev(CI_high)),col="mistyrose",border=NA)
abline(h=0,col="gray",lty=2)
lines(xvals,true_val,col=clr[2],lwd=4)
lines(xvals,predicted,col= "firebrick",lwd=4)
axis(side=2, at=c(0,1000,2000),
     labels=c("$0","$1,000","$2,000"))
axis(side=1, at=c(0.2,0.4,0.6,0.8,1),
     labels=c("20%","40%","60%","80%","100%"))
legend("topleft", c("Observed","BIFSG","Bootstrapped CIs"), lwd=c(4,4,10), 
       col=c(clr[2],"firebrick","mistyrose"), 
       bty="n")



#Difference in conditional means between ANHPI and white
true_val <- rep(0,10)
for(i in 1:10){
  true_val[i] <- mean(subset(eitc$eic,eitc$demographic==3 & eitc$agi_cat==i)) -
                        mean(subset(eitc$eic,eitc$demographic==1 & eitc$agi_cat==i))
}

predicted <- rep(0,10)
for(i in 1:10){
  predicted[i] <- sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b3,eitc$agi_cat==i)/
                             sum_narm(subset(eitc$prob_b3,eitc$agi_cat==i))) -
    sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b1,eitc$agi_cat==i)/
               sum_narm(subset(eitc$prob_b1,eitc$agi_cat==i)))
}

CI_low <- subset(CI_data$CI_lower,CI_data$diff=="3_1")
CI_high <- subset(CI_data$CI_upper,CI_data$diff=="3_1")

xvals <- seq(0.1,1,0.1)



# Draw the two bar chart using above datasets
plot(xvals,true_val,col="white", ylim=c(-750,1000),
     main="ANHPI - White",
     xlab = "Sample Income Decile",
     ylab="U.S. Dollars",
     yaxt="n",xaxt="n",
     cex.lab=1.1, cex.main=1.3)
polygon(c(xvals,rev(xvals)),c(CI_low,rev(CI_high)),col="mistyrose",border=NA)
abline(h=0,col="gray",lty=2)
lines(xvals,true_val,col=clr[2],lwd=4)
lines(xvals,predicted,col= "firebrick",lwd=4)
axis(side=2, at=c(-500,0,500,1000,1500,2000),
     labels=c("-$500","$0","$500","$1,000","$1,500","$2,000"))
axis(side=1, at=c(0.2,0.4,0.6,0.8,1),
     labels=c("20%","40%","60%","80%","100%"))
legend("topleft", c("Observed","BIFSG","Bootstrapped CIs"), lwd=c(4,4,10), 
       col=c(clr[2],"firebrick","mistyrose"), 
       bty="n")



#Difference in conditional means between Native and white
true_val <- rep(0,10)
for(i in 1:10){
  true_val[i] <- mean(subset(eitc$eic,eitc$demographic==4 & eitc$agi_cat==i)) -
                        mean(subset(eitc$eic,eitc$demographic==1 & eitc$agi_cat==i))
}

predicted <- rep(0,10)
for(i in 1:10){
  predicted[i] <- sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b4,eitc$agi_cat==i)/
                             sum_narm(subset(eitc$prob_b4,eitc$agi_cat==i))) -
    sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b1,eitc$agi_cat==i)/
               sum_narm(subset(eitc$prob_b1,eitc$agi_cat==i)))
}

CI_low <- subset(CI_data$CI_lower,CI_data$diff=="4_1")
CI_high <- subset(CI_data$CI_upper,CI_data$diff=="4_1")

xvals <- seq(0.1,1,0.1)


# Draw the two bar chart using above datasets
plot(xvals,true_val,col="white", ylim=c(0,2500),
     main="Native - White",
     xlab = "Sample Income Decile",
     ylab="U.S. Dollars",
     yaxt="n",xaxt="n",
     cex.lab=1.1, cex.main=1.3)
polygon(c(xvals,rev(xvals)),c(CI_low,rev(CI_high)),col="mistyrose",border=NA)
abline(h=0,col="gray",lty=2)
lines(xvals,true_val,col=clr[2],lwd=4)
lines(xvals,predicted,col= "firebrick",lwd=4)
axis(side=2, at=c(0,1000,2000),
     labels=c("$0","$1,000","$2,000"))
axis(side=1, at=c(0.2,0.4,0.6,0.8,1),
     labels=c("20%","40%","60%","80%","100%"))
legend("topleft", c("Observed","BIFSG","Bootstrapped CIs"), lwd=c(4,4,10), 
       col=c(clr[2],"firebrick","mistyrose"), 
       bty="n")


#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# Differences by AGI category: Non-White compared to Non-White
#Test EITC
eitc <- as.data.frame(subset(reg_data,reg_data$filer==1))

#Define AGI categories
cutoffs <- rep(0,10)
for(i in 1:10){
  cutoffs[i] <- quantile(eitc$agi,(0.1*i))
}

eitc %<>%
  mutate(agi_cat = ifelse(agi<cutoffs[1],1,NA),
         agi_cat = ifelse(agi>=cutoffs[1] & agi<cutoffs[2],2,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[2] & agi<cutoffs[3],3,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[3] & agi<cutoffs[4],4,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[4] & agi<cutoffs[5],5,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[5] & agi<cutoffs[6],6,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[6] & agi<cutoffs[7],7,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[7] & agi<cutoffs[8],8,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[8] & agi<cutoffs[9],9,agi_cat),
         agi_cat = ifelse(agi>=cutoffs[9],10,agi_cat))


#++++++++++++ Graph other differences against each other
#Difference in conditional means between Black and Hispanic
true_val <- rep(0,10)
for(i in 1:10){
  true_val[i] <- mean(subset(eitc$eic,eitc$demographic==2 & eitc$agi_cat==i)) -
                        mean(subset(eitc$eic,eitc$demographic==6 & eitc$agi_cat==i))
}

predicted <- rep(0,10)
for(i in 1:10){
  predicted[i] <- sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b2,eitc$agi_cat==i)/
                             sum_narm(subset(eitc$prob_b2,eitc$agi_cat==i))) -
    sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b6,eitc$agi_cat==i)/
               sum_narm(subset(eitc$prob_b6,eitc$agi_cat==i)))
}

CI_low <- -1*subset(CI_data$CI_lower,CI_data$diff=="6_2")
CI_high <- -1*subset(CI_data$CI_upper,CI_data$diff=="6_2")

xvals <- seq(0.1,1,0.1)

#Graph EITC Take-up truth versus predicted
par(mfrow=c(3,2))
par(bg=NA)

# Draw the two bar chart using above datasets
plot(xvals,true_val,ylim=c(-500,1500),col="white",
     main="Black - Hispanic",
     xlab = "Sample Income Decile",
     ylab="U.S. Dollars",
     yaxt="n",xaxt="n",
     cex.lab=1.3, cex.main=1.5)
polygon(c(xvals,rev(xvals)),c(CI_low,rev(CI_high)),col="mistyrose",border=NA)
abline(h=0,col="gray",lty=2)
lines(xvals,true_val,col=clr[2],lwd=4)
lines(xvals,predicted,col= "firebrick",lwd=4)
axis(side=2, at=c(-1000,-500,0,500,1000,1500,2000),
     labels=c("-$1,000","-$500","$0","$500","$1,000","$1,500","$2,000"))
axis(side=1, at=c(0.2,0.4,0.6,0.8,1),
     labels=c("20%","40%","60%","80%","100%"))
legend("topleft", c("Observed","BIFSG","Bootstrapped CIs"), lwd=c(4,4,10), 
       col=c(clr[2],"firebrick","mistyrose"), 
       bty="n")




#Difference in conditional means between Black and Asian
true_val <- rep(0,10)
for(i in 1:10){
  true_val[i] <- mean(subset(eitc$eic,eitc$demographic==2 & eitc$agi_cat==i)) -
                        mean(subset(eitc$eic,eitc$demographic==3 & eitc$agi_cat==i))
}

predicted <- rep(0,10)
for(i in 1:10){
  predicted[i] <- sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b2,eitc$agi_cat==i)/
                             sum_narm(subset(eitc$prob_b2,eitc$agi_cat==i))) -
    sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b3,eitc$agi_cat==i)/
               sum_narm(subset(eitc$prob_b3,eitc$agi_cat==i)))
}

CI_low <- -1*subset(CI_data$CI_lower,CI_data$diff=="3_2")
CI_high <- -1*subset(CI_data$CI_upper,CI_data$diff=="3_2")

xvals <- seq(0.1,1,0.1)

# Draw the two bar chart using above datasets
plot(xvals,true_val,ylim=c(-500,2500),col="white",
     main="Black - ANHPI",
     xlab = "Sample Income Decile",
     ylab="U.S. Dollars",
     yaxt="n",xaxt="n",
     cex.lab=1.3, cex.main=1.5)
polygon(c(xvals,rev(xvals)),c(CI_low,rev(CI_high)),col="mistyrose",border=NA)
abline(h=0,col="gray",lty=2)
lines(xvals,true_val,col=clr[2],lwd=4)
lines(xvals,predicted,col= "firebrick",lwd=4)
axis(side=2, at=c(-1000,-500,0,500,1000,1500,2000,2500),
     labels=c("-$1,000","-$500","$0","$500","$1,000","$1,500","$2,000","$2,500"))
axis(side=1, at=c(0.2,0.4,0.6,0.8,1),
     labels=c("20%","40%","60%","80%","100%"))
legend("topleft", c("Observed","BIFSG","Bootstrapped CIs"), lwd=c(4,4,10), 
       col=c(clr[2],"firebrick","mistyrose"), 
       bty="n")


#Difference in conditional means between Hispanic and Asian
true_val <- rep(0,10)
for(i in 1:10){
  true_val[i] <- mean(subset(eitc$eic,eitc$demographic==6 & eitc$agi_cat==i)) -
                        mean(subset(eitc$eic,eitc$demographic==3 & eitc$agi_cat==i))
}

predicted <- rep(0,10)
for(i in 1:10){
  predicted[i] <- sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b6,eitc$agi_cat==i)/
                             sum_narm(subset(eitc$prob_b6,eitc$agi_cat==i))) -
    sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b3,eitc$agi_cat==i)/
               sum_narm(subset(eitc$prob_b3,eitc$agi_cat==i)))
}

CI_low <- subset(CI_data$CI_lower,CI_data$diff=="6_3")
CI_high <-subset(CI_data$CI_upper,CI_data$diff=="6_3")

xvals <- seq(0.1,1,0.1)


# Draw the two bar chart using above datasets
plot(xvals,true_val,ylim=c(-500,1500),col="white",
     main="Hispanic - ANHPI",
     xlab = "Sample Income Decile",
     ylab="U.S. Dollars",
     yaxt="n",xaxt="n",
     cex.lab=1.3, cex.main=1.5)
polygon(c(xvals,rev(xvals)),c(CI_low,rev(CI_high)),col="mistyrose",border=NA)
abline(h=0,col="gray",lty=2)
lines(xvals,true_val,col=clr[2],lwd=4)
lines(xvals,predicted,col= "firebrick",lwd=4)
axis(side=2, at=c(-1000,-500,0,500,1000,1500,2000),
     labels=c("-$1,000","-$500","$0","$500","$1,000","$1,500","$2,000"))
axis(side=1, at=c(0.2,0.4,0.6,0.8,1),
     labels=c("20%","40%","60%","80%","100%"))
legend("topleft", c("Observed","BIFSG","Bootstrapped CIs"), lwd=c(4,4,10), 
       col=c(clr[2],"firebrick","mistyrose"), 
       bty="n")




#Difference in conditional means between Native and Black
true_val <- rep(0,10)
for(i in 1:10){
  true_val[i] <- mean(subset(eitc$eic,eitc$demographic==4 & eitc$agi_cat==i)) -
                        mean(subset(eitc$eic,eitc$demographic==2 & eitc$agi_cat==i))
}

predicted <- rep(0,10)
for(i in 1:10){
  predicted[i] <- sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b4,eitc$agi_cat==i)/
                             sum_narm(subset(eitc$prob_b4,eitc$agi_cat==i))) -
    sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b2,eitc$agi_cat==i)/
               sum_narm(subset(eitc$prob_b2,eitc$agi_cat==i)))
}

CI_low <- subset(CI_data$CI_lower,CI_data$diff=="4_2")
CI_high <- subset(CI_data$CI_upper,CI_data$diff=="4_2")

xvals <- seq(0.1,1,0.1)

# Draw the two bar chart using above datasets
plot(xvals,true_val,ylim=c(-1250,1000),col="white",
     main="Native - Black",
     xlab = "Sample Income Decile",
     ylab="U.S. Dollars",
     yaxt="n",xaxt="n",
     cex.lab=1.3, cex.main=1.5)
polygon(c(xvals,rev(xvals)),c(CI_low,rev(CI_high)),col="mistyrose",border=NA)
abline(h=0,col="gray",lty=2)
lines(xvals,true_val,col=clr[2],lwd=4)
lines(xvals,predicted,col= "firebrick",lwd=4)
axis(side=2, at=c(-1000,-500,0,500,1000,1500,2000),
     labels=c("-$1,000","-$500","$0","$500","$1,000","$1,500","$2,000"))
axis(side=1, at=c(0.2,0.4,0.6,0.8,1),
     labels=c("20%","40%","60%","80%","100%"))
legend("topleft", c("Observed","BIFSG","Bootstrapped CIs"), lwd=c(4,4,10), 
       col=c(clr[2],"firebrick","mistyrose"), 
       bty="n")


#Difference in conditional means between Native and Asian
true_val <- rep(0,10)
for(i in 1:10){
  true_val[i] <- mean(subset(eitc$eic,eitc$demographic==4 & eitc$agi_cat==i)) -
                        mean(subset(eitc$eic,eitc$demographic==3 & eitc$agi_cat==i))
}

predicted <- rep(0,10)
for(i in 1:10){
  predicted[i] <- sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b4,eitc$agi_cat==i)/
                             sum_narm(subset(eitc$prob_b4,eitc$agi_cat==i))) -
    sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b3,eitc$agi_cat==i)/
               sum_narm(subset(eitc$prob_b3,eitc$agi_cat==i)))
}

CI_low <- subset(CI_data$CI_lower,CI_data$diff=="4_3")
CI_high <- subset(CI_data$CI_upper,CI_data$diff=="4_3")

xvals <- seq(0.1,1,0.1)


# Draw the two bar chart using above datasets
plot(xvals,true_val,ylim=c(-250,2000),col="white",
     main="Native - ANHPI",
     xlab = "Sample Income Decile",
     ylab="U.S. Dollars",
     yaxt="n",xaxt="n",
     cex.lab=1.3, cex.main=1.5)
polygon(c(xvals,rev(xvals)),c(CI_low,rev(CI_high)),col="mistyrose",border=NA)
abline(h=0,col="gray",lty=2)
lines(xvals,true_val,col=clr[2],lwd=4)
lines(xvals,predicted,col= "firebrick",lwd=4)
axis(side=2, at=c(-1000,-500,0,500,1000,1500,2000),
     labels=c("-$1,000","-$500","$0","$500","$1,000","$1,500","$2,000"))
axis(side=1, at=c(0.2,0.4,0.6,0.8,1),
     labels=c("20%","40%","60%","80%","100%"))
legend("topleft", c("Observed","BIFSG","Bootstrapped CIs"), lwd=c(4,4,10), 
       col=c(clr[2],"firebrick","mistyrose"), 
       bty="n")


#Difference in conditional means between Native and Hispanic
true_val <- rep(0,10)
for(i in 1:10){
  true_val[i] <- mean(subset(eitc$eic,eitc$demographic==4 & eitc$agi_cat==i)) -
                        mean(subset(eitc$eic,eitc$demographic==6 & eitc$agi_cat==i))
}

predicted <- rep(0,10)
for(i in 1:10){
  predicted[i] <- sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b4,eitc$agi_cat==i)/
                             sum_narm(subset(eitc$prob_b4,eitc$agi_cat==i))) -
    sum_narm(subset(eitc$eic,eitc$agi_cat==i)*subset(eitc$prob_b6,eitc$agi_cat==i)/
               sum_narm(subset(eitc$prob_b6,eitc$agi_cat==i)))
}

CI_low <- -1*subset(CI_data$CI_lower,CI_data$diff=="6_4")
CI_high <- -1*subset(CI_data$CI_upper,CI_data$diff=="6_4")

xvals <- seq(0.1,1,0.1)


# Draw the two bar chart using above datasets
plot(xvals,true_val,ylim=c(-750,1500),col="white",
     main="Native - Hispanic",
     xlab = "Sample Income Decile",
     ylab="U.S. Dollars",
     yaxt="n",xaxt="n",
     cex.lab=1.3, cex.main=1.5)
polygon(c(xvals,rev(xvals)),c(CI_low,rev(CI_high)),col="mistyrose",border=NA)
abline(h=0,col="gray",lty=2)
lines(xvals,true_val,col=clr[2],lwd=4)
lines(xvals,predicted,col= "firebrick",lwd=4)
axis(side=2, at=c(-1000,-500,0,500,1000,1500,2000),
     labels=c("-$1,000","-$500","$0","$500","$1,000","$1,500","$2,000"))
axis(side=1, at=c(0.2,0.4,0.6,0.8,1),
     labels=c("20%","40%","60%","80%","100%"))
legend("topleft", c("Observed","BIFSG","Bootstrapped CIs"), lwd=c(4,4,10), 
       col=c(clr[2],"firebrick","mistyrose"), 
       bty="n")


